{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cf840bc2-300f-4fcc-b609-07c0067f6f25",
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "import os\n",
    "\n",
    "result = subprocess.run('bash -c \"source /etc/network_turbo && env | grep proxy\"', shell=True, capture_output=True, text=True)\n",
    "output = result.stdout\n",
    "for line in output.splitlines():\n",
    "    if '=' in line:\n",
    "        var, value = line.split('=', 1)\n",
    "        os.environ[var] = value\n",
    "from datasets import load_dataset\n",
    "\n",
    "# Login using e.g. `huggingface-cli login` to access this dataset\n",
    "ds = load_dataset(\"omni-research/DREAM-1K\",cache_dir = './video')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7b58b0e3-f8f1-4af7-97b3-9106417ae1ea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DatasetDict({\n",
       "    test: Dataset({\n",
       "        features: ['idx', 'video_file', 'source', 'duration', 'description', 'events', 'n_subjects', 'n_shots', 'n_events'],\n",
       "        num_rows: 1000\n",
       "    })\n",
       "})"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "49fb2c78-fda9-4485-9dbb-c50d052a4a26",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'idx': [991, 992, 993, 994, 995, 996, 997, 998, 999, 1000],\n",
       " 'video_file': ['video/991.mp4',\n",
       "  'video/992.mp4',\n",
       "  'video/993.mp4',\n",
       "  'video/994.mp4',\n",
       "  'video/995.mp4',\n",
       "  'video/996.mp4',\n",
       "  'video/997.mp4',\n",
       "  'video/998.mp4',\n",
       "  'video/999.mp4',\n",
       "  'video/1000.mp4'],\n",
       " 'source': ['DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube',\n",
       "  'DREAM/youtube'],\n",
       " 'duration': [10.0, 8.01, 6.01, 7.0, 12.0, 8.0, 6.01, 6.0, 6.0, 12.0],\n",
       " 'description': ['The video shows astronauts in the cockpit of a spacecraft, manipulating several buttons on the control panel which results in the spacecraft shaking violently. An astronaut tries to stabilize the craft by adjusting a control lever, but their terrified and scared expressions indicate the dire state of the situation.',\n",
       "  \"In this video, an individual donned in a brown suit jacket is seen obstinately attempting to operate a television using a remote control. Another individual, clad in a black suit jacket, repeatedly advises him that the remote does not control the TV. Despite this, the person continues to depress the remote's buttons. Eventually, a transparent swan ornament, perched atop the TV, is ejected by a suddenly-opening cover. The swan ornament collides with a hanging glass photo frame, containing a picture of a family – two people and a white dog – causing the glass to shatter.\",\n",
       "  \"In this video, a person wearing a brown suit is observing an oil painting hung on a red wall. The painting has a wooden frame and it portrays an elderly person with long white hair, dressed in a black gown. A green plant can be seen beside the painting, placed on the left side. A gray wall is depicted in the painting's background, with a small picture hung in a black frame at the upper left corner. Suddenly, the person in the brown suit sneezes towards the painting, and upon contact, the spray lands on the painting's face, leaving it marked with water and dirt. The person, instinctively touching their nose after the sneeze, is astonished to see the blemish on the painting.\",\n",
       "  'In this video, an individual dressed in a white shirt, black tie, and black pants approaches a vending machine, clutching a piece of paper bearing a black circle. The individual places the paper on the glass cabinet door of the machine. Astonishingly, they extend their right hand into the black circle on the paper and pass through the glass door, thereby directly retrieving a snack from the machine.',\n",
       "  'A person wearing yellow-striped flip-flops and black socks bends down, breaks off the sodium metal from the tip of a stick on a newspaper, and prepares it to throw it into the water in a plastic bottle. The person performs this experiment on a soil surface with fallen tree leaves. To be careful, the person wearing flip-flops and a gray tracksuit plucks the metal from the end of the stick with their right hand, bends down, cautiously extends their right arm, and puts it into the plastic bottle. Reacting violently with water, this large piece of metal first emits white smoke and then explodes, sending the bottle crashing to the ground.',\n",
       "  'In this video, an individual donned in an orange shirt and gray shorts is seen placing a black and white football on the grass. A referee dressed in a blue shirt and black pants stands behind the player. The player in orange takes a run up and kicks the ball with his right foot. The ball soars towards the goal, and the goalkeeper clad in yellow clothing and blue gloves raises both hands in an attempt to block it, but fails to do so. The ball zooms past the goalkeeper, into the net, catching fire and then transforming into a burst of vibrant fireworks. A person dressed in red cheers on the grass, raising both hands in celebration.',\n",
       "  'The video showcases an individual on the left with long, curly brown hair, dressed in a white sweater, carrying books, walking towards the right. An individual on the right, outfitted in a black coat and blue jeans, possesses a monitor for a head, displaying a constant succession of different profile pictures. The individual on the right abruptly knocks the books from the hands of the person on the left and aggressively shoves the latter against a red wall.',\n",
       "  'The video shows a child with short blond hair in a room, wearing a beige t-shirt, and carrying pants, walking towards a chest of drawers. A second child, with curly black hair dressed in a pink dress, is also present. The room features a chest of drawers, toys, and dolls scattered on the floor. Additionally, two children in purple pyjamas are trying to climb the white chest of drawers, introducing the risk of it falling over them, near a yellow toy truck. One of these children opens the uppermost drawer, while the child in the pink dress attempts to climb onto a black table situated under a screen. These activities are considered high-risk due to the potential of injury from tumbling furniture.',\n",
       "  'The video depicts a blonde-haired person, sporting a black swimsuit and a ponytail, standing on an orange jumping platform. With their back to the right, they face left and raise their hands. Following that, they jump, wrapping their arms around their legs and performing a spin in mid-air. They conclude by diving into the water, hand-first and feet-up, creating a considerable splash.',\n",
       "  'The video features animated characters: a large, overweight gray rabbit and a purple squirrel in a brown field surrounded by greenery and trees. The gray rabbit directs the purple squirrel to a certain location. The purple squirrel starts heading there with a hazelnut in its hand, displaying signs of fear. Stopping at a small green bush, the squirrel seems to inspect it. The camera then pulls back, revealing the bush as a wooden trap. The trap springs, launching the purple squirrel into the air and making it drop the hazelnut. The scene then shifts to the squirrel flying through the air.'],\n",
       " 'events': [['Astronauts manipulate buttons on the control panel.',\n",
       "   'Control panel usage causes spacecraft to shake violently.',\n",
       "   'Astronaut adjusts a control lever.',\n",
       "   'Astronauts display terrified expressions.',\n",
       "   'Astronauts attempt to stabilize the spacecraft.'],\n",
       "  ['Individual tries to operate TV with remote.',\n",
       "   \"Other individual advises remote doesn't control the TV.\",\n",
       "   \"Individual presses remote's buttons.\",\n",
       "   'TV cover suddenly opens.',\n",
       "   'Swan ornament is ejected from TV.',\n",
       "   'Swan ornament collides with photo frame.',\n",
       "   'Glass photo frame shatters.'],\n",
       "  ['A person observes an oil painting.',\n",
       "   'The oil painting portrays an elderly person.',\n",
       "   'A green plant is placed beside the painting.',\n",
       "   'The person in the brown suit sneezes towards the painting.',\n",
       "   \"Spray lands on the painting's face.\",\n",
       "   'The painting is marked with water and dirt.',\n",
       "   'The person touches their nose after sneezing.',\n",
       "   'The person is astonished to see the blemish.'],\n",
       "  ['Individual approaches a vending machine.',\n",
       "   'Individual clutches a piece of paper.',\n",
       "   'Piece of paper bears a black circle.',\n",
       "   'Individual places the paper on the glass door.',\n",
       "   \"Individual's right hand extends towards the paper.\",\n",
       "   'Hand passes through the glass door.',\n",
       "   'Individual retrieves a snack from the machine.'],\n",
       "  ['Person breaks off sodium metal on newspaper',\n",
       "   'Person prepares sodium for tossing into water',\n",
       "   'Person plucks sodium metal with right hand',\n",
       "   'Person bends down with sodium metal',\n",
       "   'Person cautiously extends right arm with sodium',\n",
       "   'Person puts sodium into the plastic bottle',\n",
       "   'Sodium reacts violently with water',\n",
       "   'White smoke emits from the bottle',\n",
       "   'Sodium explosion occurs',\n",
       "   'Bottle crashes to the ground'],\n",
       "  ['Player places football on grass',\n",
       "   'Player kicks ball with right foot',\n",
       "   'Ball soars towards goal',\n",
       "   'Goalkeeper attempts to block ball',\n",
       "   'Goalkeeper fails to stop ball',\n",
       "   'Ball zooms into net',\n",
       "   'Ball catches fire',\n",
       "   'Ball transforms into fireworks',\n",
       "   'Person in red cheers raising hands'],\n",
       "  ['Individual with books walks towards the right.',\n",
       "   'Individual with monitor head displays profile pictures.',\n",
       "   \"Individual with monitor head knocks books from individual's hands.\",\n",
       "   'Individual with monitor head shoves individual against red wall.'],\n",
       "  ['Child with short blond hair walks towards chest of drawers.',\n",
       "   'Child with curly black hair tries to climb black table.',\n",
       "   'Two children in purple pyjamas try to climb chest of drawers.',\n",
       "   'One child in purple pyjamas opens uppermost drawer of chest.',\n",
       "   'Toys and dolls are scattered on the floor.',\n",
       "   'Child in pink dress approaches black table.',\n",
       "   'Child risks furniture tumbling while climbing.',\n",
       "   'Child potentially injures self by climbing furniture.',\n",
       "   'Child in beige t-shirt carries pants.',\n",
       "   'Child near yellow toy truck introduces falling risk.'],\n",
       "  ['Blonde-haired person stands on jumping platform.',\n",
       "   'Person faces left.',\n",
       "   'Person raises hands.',\n",
       "   'Person jumps from platform.',\n",
       "   'Person wraps arms around legs.',\n",
       "   'Person performs a spin in mid-air.',\n",
       "   'Person dives into the water.',\n",
       "   'Person enters water hand-first.',\n",
       "   \"Person's feet go up.\",\n",
       "   'Splash occurs in water.'],\n",
       "  ['Gray rabbit directs purple squirrel to a location.',\n",
       "   'Purple squirrel carries hazelnut.',\n",
       "   'Purple squirrel displays fear.',\n",
       "   'Purple squirrel stops at green bush.',\n",
       "   'Purple squirrel inspects green bush.',\n",
       "   'Bush reveals itself as wooden trap.',\n",
       "   'Wooden trap springs.',\n",
       "   'Wooden trap launches purple squirrel into the air.',\n",
       "   'Purple squirrel drops hazelnut.']],\n",
       " 'n_subjects': [2, 3, 2, 1, 1, 4, 2, 4, 2, 2],\n",
       " 'n_shots': [6, 9, 4, 4, 1, 4, 4, 6, 1, 2],\n",
       " 'n_events': [5, 7, 8, 7, 10, 9, 4, 10, 10, 9]}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds['test'][-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8ed3f332-7c71-4b2c-875c-42210f7f8080",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing entries: 100%|██████████| 320/320 [00:00<00:00, 5401.92entry/s]\n",
      "Processing entries: 100%|██████████| 80/80 [00:00<00:00, 5551.69entry/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data has been successfully written to train_output.json and val_output.json\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "import random\n",
    "from tqdm import tqdm\n",
    "\n",
    "# 假设 ds 是你的 Hugging Face DatasetDict 对象\n",
    "# 例如: ds = load_dataset('your_dataset_name')\n",
    "\n",
    "# 首先选择 'test' 拆分\n",
    "ds_test = ds['test']\n",
    "\n",
    "# 打乱数据以确保随机性，并设置种子以便结果可复现\n",
    "ds_test = ds_test.shuffle(seed=42)\n",
    "\n",
    "# 只选择前400条数据\n",
    "ds_test = ds_test.select(range(400))\n",
    "\n",
    "# 计算分割点\n",
    "split_point = int(len(ds_test) * 0.8)\n",
    "\n",
    "# 分割数据集\n",
    "train_data = ds_test.select(range(split_point))\n",
    "val_data = ds_test.select(range(split_point, len(ds_test)))\n",
    "\n",
    "def format_entries(dataset):\n",
    "    formatted_entries = []\n",
    "    for entry in tqdm(dataset, desc=\"Processing entries\", unit=\"entry\"):\n",
    "        formatted_entry = {\n",
    "            \"messages\": [\n",
    "                {\n",
    "                    \"content\": \"<video>Please describe this video, mention who are the characters in it, what they did, and what is the background?\",\n",
    "                    \"role\": \"user\"\n",
    "                },\n",
    "                {\n",
    "                    \"content\": f\"Description: {entry['description']}\\nEvents:\\n\" + \"\\n\".join(entry['events']),\n",
    "                    \"role\": \"assistant\"\n",
    "                }\n",
    "            ],\n",
    "            \"videos\": [\n",
    "                f\"video/DREAM-1K_videos/{entry['video_file'].split('/')[-1]}\"\n",
    "            ]\n",
    "        }\n",
    "        formatted_entries.append(formatted_entry)\n",
    "    return formatted_entries\n",
    "\n",
    "# 格式化并保存训练集\n",
    "formatted_train_data = format_entries(train_data)\n",
    "with open('train_output.json', 'w', encoding='utf-8') as f:\n",
    "    json.dump(formatted_train_data, f, ensure_ascii=False, indent=4)\n",
    "\n",
    "# 格式化并保存验证集\n",
    "formatted_val_data = format_entries(val_data)\n",
    "with open('test_output.json', 'w', encoding='utf-8') as f:\n",
    "    json.dump(formatted_val_data, f, ensure_ascii=False, indent=4)\n",
    "\n",
    "print(\"Data has been successfully written to train_output.json and val_output.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d884ab25-21eb-4a2f-a0c2-818fcf3d01f2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
